#############################
# Panel model analysis
#
# Last updated: Seth, 9-12-25
#############################

library(haven)
library(dplyr)
library(lubridate)
library(plm)
library(sandwich)
library(lmtest)
library(ggplot2)
library(forcats)
library(broom)
library(purrr)
library(htmlTable)
library(htmltools)

anes <- read_dta("C:/Users/Seth/OneDrive/Research/data/anes/anes_cumulative.dta")
anes20 <- read_dta("C:/Users/Seth/OneDrive/Research/data/anes/anes20.dta")
anes24 <- read_dta("C:/Users/Seth/OneDrive/Research/data/anes/anes24.dta")


###

# 1. Data cleaning

# Prep basic variables
anes$year <- anes$VCF0004
anes$weight <- anes$VCF0009z
anes24$year <- 2024
anes24$weight <- 1
anes24$VCF0006a <- anes24$V240001

# Partisanship and affect
anes$pid <- ifelse(anes$VCF0301<1,NA,anes$VCF0301)
anes$strength_pid <- abs(anes$pid - 4)
anes$dem <- ifelse(anes$pid < 4, 1, 0)
anes$rep <- ifelse(anes$pid > 4, 1, 0)
anes$ft_dem <- ifelse(anes$VCF0218>97,0,anes$VCF0218)/97
anes$ft_rep <- ifelse(anes$VCF0224>97,0,anes$VCF0224)/97
anes$cses_dem <- ifelse(anes$VCF9201<0,0,anes$VCF9201)/10
anes$cses_rep <- ifelse(anes$VCF9202<0,0,anes$VCF9202)/10
anes$inpartyft <- ifelse(anes$pid>4,anes$ft_rep,ifelse(anes$pid<4,anes$ft_dem,NA))
anes$outpartyft <- ifelse(anes$pid<4,anes$ft_rep,ifelse(anes$pid>4,anes$ft_dem,NA))
anes$pre_affpol <- anes$inpartyft - anes$outpartyft
anes$inpartycses <- ifelse(anes$pid>4,anes$cses_rep,ifelse(anes$pid<4,anes$cses_dem,NA))
anes$outpartycses <- ifelse(anes$pid<4,anes$cses_rep,ifelse(anes$pid>4,anes$cses_dem,NA))
anes$post_affpol <- anes$inpartycses - anes$outpartycses
anes$pre_days <- ifelse(anes$VCF1015 != 99, anes$VCF1015, NA)
anes$post_days <- ifelse(anes$VCF1016 != 99, anes$VCF1016, NA)

anes24$pid <- ifelse(anes24$V241227x < 0, NA, anes24$V241227x)
anes24$strength_pid <- abs(anes24$pid - 4)
anes24$dem <- ifelse(anes24$pid < 4, 1, 0)
anes24$rep <- ifelse(anes24$pid > 4, 1, 0)
anes24$ft_dem <- ifelse(anes24$V241166 > 97, 97,
                        ifelse(anes24$V241166 < 0 , NA, anes24$V241166))/97
anes24$ft_rep <- ifelse(anes24$V241167 > 97, 97,
                        ifelse(anes24$V241167 < 0 , NA, anes24$V241167))/97
anes24$cses_dem <- ifelse(anes24$V242432 > 10, NA, ifelse(anes24$V242432 < 0, NA, anes24$V242432))/10
anes24$cses_rep <- ifelse(anes24$V242433 > 10, NA, ifelse(anes24$V242433 < 0, NA, anes24$V242433))/10
anes24$inpartyft <- ifelse(anes24$pid>4,anes24$ft_rep,ifelse(anes24$pid<4,anes24$ft_dem,NA))
anes24$outpartyft <- ifelse(anes24$pid<4,anes24$ft_rep,ifelse(anes24$pid>4,anes24$ft_dem,NA))
anes24$pre_affpol <- anes24$inpartyft - anes24$outpartyft
anes24$inpartycses <- ifelse(anes24$pid>4,anes24$cses_rep,ifelse(anes24$pid<4,anes24$cses_dem,NA))
anes24$outpartycses <- ifelse(anes24$pid<4,anes24$cses_rep,ifelse(anes24$pid>4,anes24$cses_dem,NA))
anes24$post_affpol <- anes24$inpartycses - anes24$outpartycses
election_day <- as.Date("2024-11-05")
anes24 <- anes24 %>%
  mutate(
    # Convert to Date, handle blanks like "         ."
    pre_date  = suppressWarnings(as.Date(V243050, format = "%Y%m%d")),
    post_date = suppressWarnings(as.Date(V243053, format = "%Y%m%d")),
    # Calculate days difference from Election Day
    pre_days  = abs(as.numeric(pre_date  - election_day)),
    post_days = abs(as.numeric(post_date - election_day))
  )


###

# 2. Create analysis dataset

# Create long-format survey data

# Define variables to keep
vars_to_keep <- c(
  "year", "weight", "VCF0006a", "pid", "strength_pid", "dem", "rep", 
  "ft_dem", "ft_rep", "cses_dem", "cses_rep", "inpartyft", "outpartyft",
  "pre_affpol", "inpartycses", "outpartycses", "post_affpol", "pre_days", "post_days"
)

# Subset both dataframes to these variables
anes_subset <- anes[, intersect(names(anes), vars_to_keep)]
anes24_subset <- anes24[, intersect(names(anes24), vars_to_keep)]

# Bind the rows together
analysis <- rbind(anes_subset, anes24_subset)

# Make it into panel long format
analysis_pre <- analysis %>% mutate(wave = "aapre-election", days = pre_days)
analysis_post <- analysis %>% mutate(days = post_days)

dem_wins <- c(1964, 1976, 1992, 1996, 2008, 2012, 2020)
rep_wins <- c(1968, 1972, 1980, 1984, 1988, 2000, 2004, 2016, 2024)

analysis_post <- analysis_post %>%
  mutate(
    wave = case_when(
      year %in% dem_wins & dem == 1 ~ "won",
      year %in% dem_wins & rep == 1 ~ "lost",
      year %in% rep_wins & rep == 1 ~ "won",
      year %in% rep_wins & dem == 1 ~ "lost",
      TRUE ~ NA_character_
    )
  )

analysis_long <- rbind(analysis_pre, analysis_post)

# Clean dataset

# Set partisan affect to wave-specific measure

analysis_long <- analysis_long %>%
  mutate(
    love = case_when(
      wave == "aapre-election" ~ inpartyft,
      wave %in% c("won", "lost") ~ inpartycses,
      TRUE ~ NA_real_
    ),
    outparty_love = case_when(
      wave == "aapre-election" ~ outpartyft,
      wave %in% c("won", "lost") ~ outpartycses,
      TRUE ~ NA_real_
    ),
    affpol = case_when(
      wave == "aapre-election" ~ pre_affpol,
      wave %in% c("won", "lost") ~ post_affpol,
      TRUE ~ NA_real_
    )
  )

# Standardize for supplementary analysis
analysis_long$pre_election <- ifelse(analysis_long$wave == "aapre-election", 1, 0)
analysis_long <- analysis_long %>%
  group_by(pre_election, year) %>%
  mutate(
    z_love   = scale(love)[,1],
    z_outparty_love   = scale(outparty_love)[,1],
    z_affpol = scale(affpol)[,1]
  ) %>%
  ungroup()

# Remove pre-1996 and NA obs
analysis_long <- analysis_long[!is.na(analysis_long$love),]
all_years <- c(dem_wins,rep_wins)
analysis_long <- analysis_long[analysis_long$year %in% all_years,]
analysis_long <- analysis_long %>%
  filter(!is.na(love)) %>%
  filter(year %in% c(dem_wins, rep_wins)) %>%
  group_by(VCF0006a) %>%
  slice_head(n = 2) %>% # limit panelized respondents to first appearance
  ungroup()


###

# 3. Run preferred analysis

# Model 1: Love
model_love <- plm(
  love ~ wave,
  data = analysis_long[analysis_long$year >= 1996,],
  index = c("VCF0006a"),
  model = "within"
)
# Cluster-robust SEs (clustered by individual)
love_vcov <- vcovHC(model_love, method = "arellano", type = "HC0", cluster = "group")
# Print robust coefficients
coeftest(model_love, vcov = love_vcov)

# Model 2: Hate
model_hate <- plm(
  outparty_love ~ wave,
  data = analysis_long[analysis_long$year >= 1996,],
  effect = 'individual',
  index = c("VCF0006a"),
  model = "within"
)
hate_vcov <- vcovHC(model_hate, method = "arellano", type = "HC0", cluster = "group")
coeftest(model_hate, vcov = hate_vcov)

# Model 3: Affective Polarization
model_affpol <- plm(
  affpol ~ wave,
  data = analysis_long[analysis_long$year >= 1996,],
  effect = 'individual',
  index = c("VCF0006a"),
  model = "within"
)
affpol_vcov <- vcovHC(model_affpol, method = "arellano", type = "HC0", cluster = "group")
coeftest(model_affpol, vcov = affpol_vcov)


# Summaries
summary(model_love)
cor(predict(model_love, model_love$model), model_love$model$love) ^ 2

summary(model_hate)
cor(predict(model_hate, model_hate$model), model_hate$model$outparty_love) ^ 2

summary(model_affpol)
cor(predict(model_affpol, model_affpol$model), model_affpol$model$affpol) ^ 2


###

# 4. Question wording mapping analysis


# a. Mapping based on 2020 FT and CSES candidate questions

# code variables
clamp01 <- function(z) pmin(pmax(z, 0), 1)
anes20 <- anes20 %>%
  mutate(
    bidenfeel_post = ifelse(V202143 >= 0, V202143, NA_real_),
    trumpfeel_post = ifelse(V202144 >= 0, V202144, NA_real_),
    bidenfeel_norm = pmin(bidenfeel_post, 97) / 97,
    trumpfeel_norm = pmin(trumpfeel_post, 97) / 97,
    # Candidate like/dislike (0-10), /10; sub-zero coded values -> NA
    bidenlike_norm = ifelse(V202435 >= 0, V202435 / 10, NA_real_),
    trumplike_norm = ifelse(V202436 >= 0, V202436 / 10, NA_real_)
  )

# linear function
map_df <- bind_rows(
  transmute(anes20, ft = bidenfeel_norm, like = bidenlike_norm, target = "Biden"),
  transmute(anes20, ft = trumpfeel_norm, like = trumplike_norm, target = "Trump")
) %>%
  filter(!is.na(ft), !is.na(like))

map_lm <- lm(like ~ ft, data = map_df)

a_hat  <- unname(coef(map_lm)[1])
b_hat  <- unname(coef(map_lm)[2])
map_linear_fun <- function(x_like_0to1) clamp01(a_hat + b_hat * x_like_0to1)

# percentile-based function
ecdf_ft   <- ecdf(map_df$ft)
probs_grid  <- seq(0, 1, by = 0.001)
q_like_grid <- quantile(map_df$like, probs = probs_grid, na.rm = TRUE, type = 7)

map_percentile_fun_rev <- function(x_ft_0to1) {
  ifelse(
    is.na(x_ft_0to1),
    NA_real_,
    {
      # Get percentile of FT value in its distribution
      p <- ecdf_ft(x_ft_0to1)
      p <- clamp01(p)
      
      # Map that percentile to the like (0–10) distribution
      as.numeric(approx(x = probs_grid, y = q_like_grid, xout = p,
                        rule = 2, ties = "ordered")$y)
    }
  )
}

# recode variables in analysis_long
analysis_long <- analysis_long %>%
  mutate(
    # Linear mapping: keep as you wrote it
    love_linear = case_when(
      wave == "aapre-election"   ~ inpartyft,
      wave %in% c("won","lost") ~ map_linear_fun(inpartycses),
      TRUE ~ NA_real_
    ),
    outparty_linear = case_when(
      wave == "aapre-election"   ~ outpartyft,
      wave %in% c("won","lost") ~ map_linear_fun(outpartycses),
      TRUE ~ NA_real_
    ),
    post_affpol_linear = ifelse(wave %in% c("won","lost"),
                                love_linear - outparty_linear, NA_real_),
    
    # Percentile mapping: FT → 0–10 scale
    love_pct = case_when(
      wave == "aapre-election"   ~ map_percentile_fun_rev(inpartyft),
      wave %in% c("won","lost") ~ inpartycses,   # already on 0–10
      TRUE ~ NA_real_
    ),
    outparty_pct = case_when(
      wave == "aapre-election"   ~ map_percentile_fun_rev(outpartyft),
      wave %in% c("won","lost") ~ outpartycses,  # already on 0–10
      TRUE ~ NA_real_
    ),
    post_affpol_pct = ifelse(wave %in% c("won","lost"),
                             love_pct - outparty_pct, NA_real_),
    
    affpol_linear = case_when(
      wave == "aapre-election"   ~ inpartyft - outpartyft,
      wave %in% c("won","lost") ~ post_affpol_linear,
      TRUE ~ NA_real_
    ),
    affpol_pct = case_when(
      wave == "aapre-election"   ~ love_pct - outparty_pct,
      wave %in% c("won","lost") ~ post_affpol_pct,
      TRUE ~ NA_real_
    )
  )

# re-run models and export
# Model 1: Love (in-party warmth, harmonized linear)
model_love_linear <- plm(
  love_linear ~ wave,
  data = analysis_long[analysis_long$year >= 1996,],
  index = c("VCF0006a"),
  model = "within"
)
love_linear_vcov <- vcovHC(model_love_linear, method = "arellano", type = "HC0", cluster = "group")
coeftest(model_love_linear, vcov = love_linear_vcov)

# Model 2: Hate (out-party warmth, harmonized linear)
model_hate_linear <- plm(
  outparty_linear ~ wave,
  data = analysis_long[analysis_long$year >= 1996,],
  index = c("VCF0006a"),
  model = "within"
)
hate_linear_vcov <- vcovHC(model_hate_linear, method = "arellano", type = "HC0", cluster = "group")
coeftest(model_hate_linear, vcov = hate_linear_vcov)

# Model 3: Affective polarization (in - out, harmonized linear)
model_affpol_linear <- plm(
  affpol_linear ~ wave,
  data = analysis_long[analysis_long$year >= 1996,],
  index = c("VCF0006a"),
  model = "within"
)
affpol_linear_vcov <- vcovHC(model_affpol_linear, method = "arellano", type = "HC0", cluster = "group")
coeftest(model_affpol_linear, vcov = affpol_linear_vcov)


# --- Percentile harmonized versions --------------------------------------
# Model 1: Love (in-party warmth, harmonized percentile)
model_love_pct <- plm(
  love_pct ~ wave,
  data = analysis_long[analysis_long$year >= 1996,],
  index = c("VCF0006a"),
  model = "within"
)
love_pct_vcov <- vcovHC(model_love_pct, method = "arellano", type = "HC0", cluster = "group")
coeftest(model_love_pct, vcov = love_pct_vcov)

# Model 2: Hate (out-party warmth, harmonized percentile)
model_hate_pct <- plm(
  outparty_pct ~ wave,
  data = analysis_long[analysis_long$year >= 1996,],
  index = c("VCF0006a"),
  model = "within"
)
hate_pct_vcov <- vcovHC(model_hate_pct, method = "arellano", type = "HC0", cluster = "group")
coeftest(model_hate_pct, vcov = hate_pct_vcov)

# Model 3: Affective polarization (in - out, harmonized percentile)
model_affpol_pct <- plm(
  affpol_pct ~ wave,
  data = analysis_long[analysis_long$year >= 1996,],
  index = c("VCF0006a"),
  model = "within"
)
affpol_pct_vcov <- vcovHC(model_affpol_pct, method = "arellano", type = "HC0", cluster = "group")
coeftest(model_affpol_pct, vcov = affpol_pct_vcov)

# Export to HTML
# Helper: significance stars
get_stars <- function(pval) {
  ifelse(pval < 0.001, "***",
         ifelse(pval < 0.01, "**",
                ifelse(pval < 0.05, "*", "")))
}

# Function to tidy and format one model
tidy_model <- function(model, vcov) {
  broom::tidy(coeftest(model, vcov)) %>%
    rename(term = 1, estimate = 2, std.error = 3, statistic = 4, p.value = 5) %>%
    mutate(
      est_se = sprintf(
        "%.3f%s<br>(%.3f)",
        estimate, get_stars(p.value), std.error
      )
    ) %>%
    select(term, est_se)
}

# Tidy all six models
affpol_lin   <- tidy_model(model_affpol_linear, affpol_linear_vcov)
affpol_pct   <- tidy_model(model_affpol_pct, affpol_pct_vcov)
love_lin     <- tidy_model(model_love_linear, love_linear_vcov)
love_pct     <- tidy_model(model_love_pct, love_pct_vcov)
outparty_lin <- tidy_model(model_hate_linear, hate_linear_vcov)
outparty_pct <- tidy_model(model_hate_pct, hate_pct_vcov)

# Combine into one wide table
results <- purrr::reduce(
  list(
    affpol_lin %>% rename(affpol_linear = est_se),
    affpol_pct %>% rename(affpol_pct   = est_se),
    love_lin   %>% rename(love_linear  = est_se),
    love_pct   %>% rename(love_pct     = est_se),
    outparty_lin %>% rename(outparty_linear = est_se),
    outparty_pct %>% rename(outparty_pct   = est_se)
  ),
  full_join,
  by = "term"
)

calc_r2 <- function(model) {
  yhat <- predict(model)
  y <- model$model[[1]]
  if (length(y) != length(yhat)) return(NA_real_)
  cor(y, yhat, use = "complete.obs")^2
}

# Extract stats
stats <- tibble::tibble(
  model = c("affpol_linear","affpol_pct","love_linear","love_pct","outparty_linear","outparty_pct"),
  rsq   = c(
    calc_r2(model_affpol_linear),
    calc_r2(model_affpol_pct),
    calc_r2(model_love_linear),
    calc_r2(model_love_pct),
    calc_r2(model_hate_linear),
    calc_r2(model_hate_pct)
  ),
  n     = c(
    nobs(model_affpol_linear),
    nobs(model_affpol_pct),
    nobs(model_love_linear),
    nobs(model_love_pct),
    nobs(model_hate_linear),
    nobs(model_hate_pct)
  )
)

# Format into string rows (rounding R² for readability)
rsq_row <- tibble(term = "R-squared") %>%
  bind_cols(t(sprintf("%.3f", stats$rsq)) %>%
              as.data.frame() %>%
              setNames(stats$model))

n_row <- tibble(term = "N") %>%
  bind_cols(t(as.character(stats$n)) %>%
              as.data.frame() %>%
              setNames(stats$model))

# Bind to results
results <- bind_rows(results, rsq_row, n_row)

# Reorder columns
results <- results %>%
  dplyr::select(term, affpol_linear, affpol_pct, love_linear, love_pct, outparty_linear, outparty_pct)

# Export
tab_html <- htmlTable(
  results,
  header = c("Variable", "AffPol Linear", "AffPol Percentile",
             "Inparty Linear", "Inparty Percentile",
             "Outparty Linear", "Outparty Percentile"),
  rnames = FALSE,
  caption = "Panel models with cluster-robust SEs (Arellano HC0).<br>
             Estimates with significance stars: *** p < .001, ** p < .01, * p < .05."
)

# Save to file
save_html(tab_html,
          file = "C:/Users/Seth/OneDrive/Research/Phillips-Warner Industries/Partisan Competition/model_results.html")


###

# 5. Standardization for question wording

# Model 1: Love (standardized)
model_z_love <- plm(
  z_love ~ wave,
  data = analysis_long[analysis_long$year >= 1996, ],
  index = c("VCF0006a"),
  model = "within"
)
# Cluster-robust SEs (clustered by individual)
z_love_vcov <- vcovHC(model_z_love, method = "arellano", type = "HC0", cluster = "group")
# Print robust coefficients
coeftest(model_z_love, vcov = z_love_vcov)

# Model 2: Hate (standardized)
model_z_hate <- plm(
  z_outparty_love ~ wave,
  data = analysis_long[analysis_long$year >= 1996, ],
  effect = "individual",
  index = c("VCF0006a"),
  model = "within"
)
z_hate_vcov <- vcovHC(model_z_hate, method = "arellano", type = "HC0", cluster = "group")
coeftest(model_z_hate, vcov = z_hate_vcov)

# Model 3: Affective Polarization (standardized)
model_z_affpol <- plm(
  z_affpol ~ wave,
  data = analysis_long[analysis_long$year >= 1996, ],
  effect = "individual",
  index = c("VCF0006a"),
  model = "within"
)
z_affpol_vcov <- vcovHC(model_z_affpol, method = "arellano", type = "HC0", cluster = "group")
coeftest(model_z_affpol, vcov = z_affpol_vcov)

# collect R-squared
cor(predict(model_z_love, model_z_love$model), model_z_love$model$z_love) ^ 2
cor(predict(model_z_hate, model_z_hate$model), model_z_hate$model$z_outparty_love) ^ 2
cor(predict(model_z_affpol, model_z_affpol$model), model_z_affpol$model$z_affpol) ^ 2


# b. Interact with time
analysis_long$days <- abs(analysis_long$days)
# Model 1: Love (with wave × days)
analysis_long$love_temp <- analysis_long$love*100
model_love_days <- plm(
  love_temp ~ wave * days,
  data = analysis_long[analysis_long$year >= 1996, ],
  index = c("VCF0006a"),
  model = "within"
)
love_days_vcov <- vcovHC(model_love_days, method = "arellano", type = "HC0", cluster = "group")
coeftest(model_love_days, vcov = love_days_vcov)
summary(model_love_days)

# Find the R-squared:
cor(predict(model_love_days), model.response(model.frame(model_love_days)))^2


###

# 5. Visualize by year

# Election years to analyze
years <- c(1996, 2004, 2008, 2012, 2016, 2020, 2024)

# Function to extract estimates and CIs
get_wave_estimates <- function(data, year, outcome) {
  d <- data[data$year == year, ]
  model <- plm(
    reformulate("wave", response = outcome),
    data = d,
    index = c("VCF0006a"),
    model = "within"
  )
  vcov_cluster <- vcovHC(model, method = "arellano", type = "HC0", cluster = "group")
  ct <- coeftest(model, vcov = vcov_cluster)
  out <- data.frame(
    term = rownames(ct),
    estimate = ct[,1],
    se = ct[,2],
    outcome = outcome,
    year = year
  )
  out <- out[out$term %in% c("wavelost", "wavewon"), ]
  out$ci_low <- out$estimate - 1.96 * out$se
  out$ci_high <- out$estimate + 1.96 * out$se
  return(out)
}

# Run and collect results
results <- do.call(rbind, lapply(years, function(y) {
  bind_rows(
    get_wave_estimates(analysis_long, y, "love"),
    get_wave_estimates(analysis_long, y, "outparty_love"),
    get_wave_estimates(analysis_long, y, "affpol")
  )
}))

# Relabel for plotting
results_clean <- results %>%
  mutate(
    term = recode(term, wavelost = "Lost", wavewon = "Won"),
    outcome = recode(outcome,
                     affpol = "Polarization",
                     outparty_love = "Out-Party Affect",
                     love = "In-Party Affect"),
    outcome = factor(outcome, levels = c("Polarization", "Out-Party Affect", "In-Party Affect")),
    year = as.factor(year)
  )

# Plot
ggplot(results_clean, aes(x = term, y = estimate, fill = term)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7) +
  geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.2,
                position = position_dodge(width = 0.8)) +
  facet_grid(outcome ~ year, switch = "y") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
  labs(
    x = "Election Outcome",
    y = "Estimated Effect (vs. Pre-Election)",
    fill = "Outcome",
    title = "Estimated Change in Partisan Affect Post-Election by Year and Outcome"
  ) +
  scale_fill_grey(start = 0.3, end = 0.7) +
  theme_bw(base_size = 13) +
  theme(
    strip.placement = "outside",
    strip.text.y.left = element_text(angle = 0, size = 13, face = "bold"),
    strip.text.x = element_text(size = 13),
    axis.title = element_text(size = 13, face = "bold"),
    axis.text = element_text(size = 13),
    legend.title = element_text(size = 13),
    legend.text = element_text(size = 13),
    panel.spacing.y = unit(1.6, "lines"),  # More space between rows
    legend.position = "none"
  )
